This endpoint allows parsing the content on any page you specify and will return the structured content of the target page, including link URLs, anchors, headings, and textual content.
Note: to use this endpoint, make sure the enable_content_parsing parameter in the Task Post request is set to true.
Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-dashboard
# Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
login="login"
password="password"
cred="$(printf ${login}:${password} | base64)"
curl --location --request POST "https://api.dataforseo.com/v3/on_page/content_parsing"
--header "Authorization: Basic ${cred}"
--header "Content-Type: application/json"
--data-raw "[
{
"url": "https://www.fujielectric.com/",
"id": "11161551-1535-0216-0000-500b3f307f92"
}
]"
<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
$client = new RestClient($api_url, null, 'login', 'password');
$post_array = array();
// simple way to get a result
$post_array[] = array(
"url" => "https://www.fujielectric.com/",
"id" => "11161551-1535-0216-0000-500b3f307f92",
);
try {
// POST /v3/on_page/content_parsing
// the full list of possible parameters is available in documentation
$result = $client->post('/v3/on_page/content_parsing', $post_array);
print_r($result);
// do something with post result
} catch (RestClientException $e) {
echo "n";
print "HTTP code: {$e->getHttpCode()}n";
print "Error code: {$e->getCode()}n";
print "Message: {$e->getMessage()}n";
print $e->getTraceAsString();
echo "n";
}
$client = null;
?>
from random import Random
from client import RestClient
# You can download this file from here https://api.dataforseo.com/v3/_examples/python/_python_Client.zip
client = RestClient("login", "password")
post_data = dict()
# simple way to get a result
post_data[len(post_data)] = dict(
url="https://www.fujielectric.com/",
id="11161551-1535-0216-0000-500b3f307f92"
)
# POST /v3/on_page/content_parsing
# the full list of possible parameters is available in documentation
response = client.post("/v3/on_page/content_parsing", post_data)
# you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if response["status_code"] == 20000:
print(response)
# do something with result
else:
print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task on_page_content_parsing()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
// Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var postData = new List<object>();
// simple way to get a result
postData.Add(new
{
url = "https://www.fujielectric.com/",
id = "11161551-1535-0216-0000-500b3f307f92"
});
// POST /v3/on_page/content_parsing
// the full list of possible parameters is available in documentation
var taskPostResponse = await httpClient.PostAsync("/v3/on_page/content_parsing", new StringContent(JsonConvert.SerializeObject(postData)));
var result = JsonConvert.DeserializeObject<dynamic>(await taskPostResponse.Content.ReadAsStringAsync());
// you can find the full list of the response codes here https://docs.dataforseo.com/v3/appendix/errors
if (result.status_code == 20000)
{
// do something with result
Console.WriteLine(result);
}
else
Console.WriteLine($"error. Code: {result.status_code} Message: {result.status_message}");
}
}
}
The above command returns JSON structured like this:
All POST data should be sent in the JSON format (UTF-8 encoding). The task setting is done using the POST method. When setting a task, you should send all task parameters in the task array of the generic POST array.
Description of the fields for setting a task:
Field name
Type
Description
url
string
URL of the content to parse required field
URL of the page to parse
example: https://www.fujielectric.com/
id
string
ID of the task required field
you can get this ID in the response of the Task POST endpoint note: the enable_content_parsing parameter in the POST request must be set to true
example: "07131248-1535-0216-1000-17384017ad04"
As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.
Description of the fields in the response array:
Field name
Type
Description
version
string
the current version of the API
status_code
integer
general status code
you can find the full list of the response codes here Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message
string
general informational message
you can find the full list of general informational messages here
time
string
execution time, seconds
cost
float
total tasks cost, USD
tasks_count
integer
the number of tasks in the tasks array
tasks_error
integer
the number of tasks in the tasks array returned with an error
tasks
array
array of tasks
id
string
task identifier unique task identifier in our system in the UUID format
status_code
integer
status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
status_message
string
informational message of the task
you can find the full list of general informational messages here
time
string
execution time, seconds
cost
float
cost of the task, USD
result_count
integer
number of elements in the result array
path
array
URL path
data
object
contains the same parameters that you specified in the POST request
result
array
array of results
crawl_progress
string
status of the crawling session
possible values: in_progress, finished
crawl_status
object
details of the crawling session
items_count
integer
number of items in the results array
items
array
items array
‘сontent_parsing_element’
resource_type
string
type of the returned resource = ‘сontent_parsing_element’
fetch_time
string
date and time when the content was fethced
example: "2022-11-01 10:02:52 +00:00"
page_content
object
parsed content of the page
header
object
parsed content of the header
primary_content
array
primary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
secondary_content
array
secondary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
footer
object
parsed content of the footer
primary_content
array
primary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
secondary_content
array
secondary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
main_topic
array
main topic on the page
you can find more information about topic priority calculation in this help center article
h_title
string
meta title
main_title
string
main title of the block
author
string
content author name
language
string
content language
level
string
HTML level
primary_content
array
primary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
secondary_content
array
secondary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
secondary_topic
array
secondary topic on the page
you can find more information about topic priority calculation in this help center article
h_title
string
meta title
main_title
string
main title of the block
author
string
content author name
language
string
content language
level
string
HTML level
primary_content
array
primary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor
secondary_content
array
secondary content on the page
you can find more information about content priority calculation in this help center article
text
string
content text
url
string
page URL
displayed in case the text is a link anchor